set.seed(1)
required_packages <- c("tidyverse", "magrittr", "DBI", "bigrquery", "arrow","glue", "vroom","janitor", "gt", "ggwordcloud", "readxl", "ggthemes", "hrbrthemes", "extrafont", "plotly", "scales", "stringr", "gganimate", "here", "tidytext", "sentimentr", "scales", "DT", "here", "sm", "mblm", "glue", "fs", "knitr", "rmdformats", "janitor", "urltools", "colorspace", "pdftools", "showtext", "pander", "wordcloud2", "stopwords", "magicfor", "gapminder", "arrow")
for(i in required_packages) {
if(!require(i, character.only = T)) {
# if package is not existing, install then load the package
install.packages(i, dependencies = T)
require(i, character.only = T)
}
}
panderOptions('table.alignment.default', "left")
## quality of png's
dpi <- 750
## theme updates; please adjust to client´s website
#theme_set(ggthemes::theme_clean(base_size = 15))
theme_set(ggthemes::theme_clean(base_size = 15))
theme_update(plot.margin = margin(30, 30, 30, 30),
plot.background = element_rect(color = "white",
fill = "white"),
plot.title = element_text(size = 20,
face = "bold",
lineheight = 1.05,
hjust = .5,
margin = margin(10, 0, 25, 0)),
plot.title.position = "plot",
plot.caption = element_text(color = "grey40",
size = 9,
margin = margin(20, 0, -20, 0)),
plot.caption.position = "plot",
axis.line.x = element_line(color = "black",
size = .8),
axis.line.y = element_line(color = "black",
size = .8),
axis.title.x = element_text(size = 16,
face = "bold",
margin = margin(t = 20)),
axis.title.y = element_text(size = 16,
face = "bold",
margin = margin(r = 20)),
axis.text = element_text(size = 11,
color = "black",
face = "bold"),
axis.text.x = element_text(margin = margin(t = 10)),
axis.text.y = element_text(margin = margin(r = 10)),
axis.ticks = element_blank(),
panel.grid.major.x = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.major.y = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.minor.x = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.minor.y = element_blank(),
panel.spacing.x = unit(4, "lines"),
panel.spacing.y = unit(2, "lines"),
legend.position = "top",
legend.title = element_text(,
color = "black",
size = 14,
margin = margin(5, 0, 5, 0)),
legend.text = element_text(
color = "black",
size = 11,
margin = margin(4.5, 4.5, 4.5, 4.5)),
legend.background = element_rect(fill = NA,
color = NA),
legend.key = element_rect(color = NA, fill = NA),
#legend.key.width = unit(5, "lines"),
#legend.spacing.x = unit(.05, "pt"),
#legend.spacing.y = unit(.55, "pt"),
#legend.margin = margin(0, 0, 10, 0),
strip.text = element_text(face = "bold",
margin = margin(b = 10)))
## theme settings for flipped plots
theme_flip <-
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_line(size = .6,
color = "#eaeaea"))
## theme settings for charts without y axis
theme_blank <-
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank())
## numeric format for labels
num_format <- scales::format_format(big.mark = ",", small.mark = ",", scientific = F)
## main color backlinko
bl_col <- "#00d188"
bl_dark <- darken(bl_col, .3, space = "HLS")
Without 0 volume included
get_count_range <- function(lower, higher)
{
sql <- glue(
"SELECT COUNT(*) AS count, SUM(COALESCE(keyword_info_search_volume / 10000, 0)) AS volume
FROM `dataforseo-bigquery.dataforseo_data.keyword_data`
WHERE location = 2840
AND spell = ''
AND keyword_info_search_volume > {lower}
AND keyword_info_search_volume <= {higher}")
tb <- bq_project_query("dataforseo-bigquery", sql)
list(count = bq_table_download(tb)$count, volume = bq_table_download(tb)$volume)
}
df <- tribble(
~cat, ~count,
"1 - 10", get_count_range(0, 10),
"10 - 100", get_count_range(10, 100),
"100 - 1000", get_count_range(100, 1000),
"1000 - 10K", get_count_range(1000, 10000),
"10K - 100K", get_count_range(10000, "100000"),
"100K+", get_count_range("100001", "100000000000"))
cat6 <- df %>% unnest_wider(count) %>%
mutate(volume_prop = volume / sum(volume),
count_prop = count / sum(count)) %>%
mutate(cat = factor(cat, levels = c("1 - 10", "10 - 100", "100 - 1000", "1000 - 10K",
"10K - 100K", "100K+")))
cat6 %>%
ggplot(aes(x = cat, y = volume_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(volume_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all search volume", title = "Volume of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.7), expand = c(0.001, 0.001))

cat6 %>%
ggplot(aes(x = cat, y = count_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(count_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all searches", title = "Number of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.7), expand = c(0.001, 0.001))

cat4 <- cat6 %>% mutate(cat =
case_when(
cat == "1 - 10" ~ "1 - 100",
cat == "10 - 100" ~ "1 - 100",
cat == "100 - 1000" ~ "100 - 1000",
cat == "1000 - 10K" ~ "1000 - 10K",
cat == "10K - 100K" ~ "10K+",
cat == "100K+" ~ "10K+",
)) %>%
group_by(cat) %>%
summarise(across(everything(), sum))
cat4 %>%
ggplot(aes(x = cat, y = volume_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(volume_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all search volume", title = "Volume of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0.001, 0.001))

cat4 %>%
ggplot(aes(x = cat, y = count_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(count_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all searches", title = "Number of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0.001, 0.001))

With 0 volume included
get_count_range <- function(lower, higher)
{
sql <- glue(
"SELECT COUNT(*) AS count, SUM(COALESCE(keyword_info_search_volume / 10000, 0)) AS volume
FROM `dataforseo-bigquery.dataforseo_data.keyword_data`
WHERE location = 2840
AND spell = ''
AND keyword_info_search_volume >= {lower}
AND keyword_info_search_volume <= {higher}")
tb <- bq_project_query("dataforseo-bigquery", sql)
list(count = bq_table_download(tb)$count, volume = bq_table_download(tb)$volume)
}
df <- tribble(
~cat, ~count,
"0 - 10", get_count_range(0, 10),
"10 - 100", get_count_range(10, 100),
"100 - 1000", get_count_range(100, 1000),
"1000 - 10K", get_count_range(1000, 10000),
"10K - 100K", get_count_range(10000, "100000"),
"100K+", get_count_range("100001", "100000000000"))
cat6 <- df %>% unnest_wider(count) %>%
mutate(volume_prop = volume / sum(volume),
count_prop = count / sum(count)) %>%
mutate(cat = factor(cat, levels = c("0 - 10", "10 - 100", "100 - 1000", "1000 - 10K",
"10K - 100K", "100K+")))
cat6 %>%
ggplot(aes(x = cat, y = volume_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(volume_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all search volume", title = "Volume of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.7), expand = c(0.001, 0.001))

cat6 %>%
ggplot(aes(x = cat, y = count_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(count_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all searches", title = "Number of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.7), expand = c(0.001, 0.001))

cat4 <- cat6 %>% mutate(cat =
case_when(
cat == "0 - 10" ~ "0 - 100",
cat == "10 - 100" ~ "0 - 100",
cat == "100 - 1000" ~ "100 - 1000",
cat == "1000 - 10K" ~ "1000 - 10K",
cat == "10K - 100K" ~ "10K+",
cat == "100K+" ~ "10K+",
)) %>%
group_by(cat) %>%
summarise(across(everything(), sum))
cat4 %>%
ggplot(aes(x = cat, y = volume_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(volume_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all search volume", title = "Volume of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0.001, 0.001))

cat4 %>%
ggplot(aes(x = cat, y = count_prop)) +
geom_bar(stat = "identity", fill = bl_dark, color = "black", width = .85) +
geom_text(aes(label = glue::glue("{format(round(count_prop, 3) * 100, scientific = FALSE)}%")),
nudge_y = .04, family = "Montserrat", fontface = "bold", color = "grey40", size = 3) +
theme(panel.grid.major.x = element_blank()) +
labs(x = "Search volume category", y = "Percentage of all searches", title = "Number of searches") +
scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0.001, 0.001))

con <- dbConnect(
bigrquery::bigquery(),
project = "dataforseo-bigquery",
billing = "dataforseo-bigquery"
)
sql <- glue("SELECT SUM(COALESCE(keyword_info_search_volume / 10000, 0)) AS volume
FROM `dataforseo-bigquery.dataforseo_data.keyword_data`
WHERE location = 2840
AND keyword_info_search_volume > 0
AND spell = ''")
tb <- bq_project_query("dataforseo-bigquery", sql)
top <- bq_table_download(tb)
total_volume <- top$volume * 10000
con <- dbConnect(
bigrquery::bigquery(),
project = "dataforseo-bigquery",
billing = "dataforseo-bigquery"
)
sql <- glue("SELECT * FROM `dataforseo-bigquery.dataforseo_data.keyword_data`
WHERE location = 2840
AND keyword_info_search_volume > 0
AND spell = ''
ORDER BY keyword_info_search_volume DESC
LIMIT 50000")
tb <- bq_project_query("dataforseo-bigquery", sql)
top <- bq_table_download(tb, max_results = 50000)